Purpose

Here we have an initial foray into plotting some of the covariate values, such as RNA-binding protein associations, with respect to the readout of partition coefficients. Since there are three partitions that are compositional, we can use ternary plots to represent the readout space.

Initialization

Libraries

library(magrittr)
library(tidyverse)
library(ggtern)
library(ggridges)
library(gtsummary)
library(readxl)
library(cowplot)

Parameters

set.seed(20220602)

RDS_IN="data/df_tiger_clean.Rds"

LOC_COLORS=c("ER"="#4B74A6", "TG"="#E4E4CA", "CY"="#949494", "DF"="#CCCCCC")
LOC_COLORS_DARK=c("ER"="#4B74A6", "TG"="#C9C497", "CY"="#949494", "DF"="#CCCCCC")

MIN_PCON_CY=1.15
MIN_PCON_ER=1.25
MIN_PCON_TG=1.30

MIN_PCON_DIFF=0.15

Data

Loading

df_tiger <- readRDS(RDS_IN)

Analysis

Sanity Checks

Compositionality

First, validate that the partition coefficients for each gene sum to 1.

df_pcos <- df_tiger %>%
    mutate(pco_sum=pco_tg+pco_er+pco_cy) %>%
    select(gene_name, refseq_id, category, pco_sum, pco_tg, pco_er, pco_cy)

ggplot(df_pcos, aes(x=pco_sum)) +
    geom_histogram(fill='lightgrey', color='black', binwidth=0.005) +
    geom_histogram(data=filter(df_pcos, map_lgl(pco_sum, all.equal, current=1)), fill='steelblue', color='black', binwidth=0.005) +
    labs(x="Sum of Partition Coefficients", y="Genes") +
    scale_y_continuous(expand=c(0,0,0.1,0)) +
    theme_light()

Seems to be correct!

Normalizaton

Normalized coefficients are derived by by dividing by the medians. Let’s verify.

df_pcons <- df_tiger %>%
    select(gene_name, refseq_id, category, 
           pco_cy, pco_er, pco_tg, 
           npco_cy, npco_er, npco_tg) %>%
    mutate(pcon_cy=pco_cy/median(pco_cy),
           pcon_er=pco_er/median(pco_er),
           pcon_tg=pco_tg/median(pco_tg))

df_pcons %$% all.equal(npco_cy, pcon_cy)
## [1] "Mean relative difference: 1.519713e-08"
df_pcons %$% all.equal(npco_er, pcon_er)
## [1] "Mean relative difference: 0.0001565953"
df_pcons %$% all.equal(npco_tg, pcon_tg)
## [1] "Mean relative difference: 0.000138522"

Looks like there are some differences. Let’s check that these really are uniformly small, and not a few large differences.

df_pcons %$% summary(npco_cy - pcon_cy)
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -3.494e-08 -1.831e-08 -1.520e-08 -1.523e-08 -1.209e-08 -1.636e-11
df_pcons %$% summary(npco_er - pcon_er)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 4.878e-07 1.371e-04 1.566e-04 1.602e-04 1.787e-04 3.596e-04
df_pcons %$% summary(npco_tg - pcon_tg)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 3.687e-05 1.190e-04 1.385e-04 1.440e-04 1.630e-04 4.134e-04
df_pcons %>%
    ggplot() +
    geom_histogram(aes(x=npco_cy-pcon_cy), binwidth=1e-5, position='stack',
                   fill=LOC_COLORS["CY"], color='black') +
    geom_histogram(aes(x=npco_er-pcon_er), binwidth=1e-5, position='stack',
                   fill=LOC_COLORS["ER"], color='black') +
    geom_histogram(aes(x=npco_tg-pcon_tg), binwidth=1e-5, position='stack',
                   fill=LOC_COLORS["TG"], color='black') +
    scale_y_continuous(expand=c(0,0,0.05,0)) +
    labs(x="Difference (Reported - Computed)", y="Genes") +
    theme_light()

They are small, but they are biased. Were the medians rounded prior to dividing?

df_pcons %$% median(pco_cy)
## [1] 0.3426
df_pcons %$% median(pco_er)
## [1] 0.3021473
df_pcons %$% median(pco_tg)
## [1] 0.3346464

Yes. This is consistent with having rounded the medians to the fourth sigfig prior to computing the normalized values. So, we should get exact matching with…

df_pcons2 <- df_tiger %>%
    select(gene_name, refseq_id, category, 
           pco_cy, pco_er, pco_tg, 
           npco_cy, npco_er, npco_tg) %>%
    mutate(pcon_cy=pco_cy/round(median(pco_cy), digits=4),
           pcon_er=pco_er/round(median(pco_er), digits=4),
           pcon_tg=pco_tg/round(median(pco_tg), digits=4))

df_pcons2 %$% all.equal(npco_cy, pcon_cy)
## [1] TRUE
df_pcons2 %$% all.equal(npco_er, pcon_er)
## [1] TRUE
df_pcons2 %$% all.equal(npco_tg, pcon_tg)
## [1] TRUE

Perfect!

Consistent Categorization

We were told that categorization was based on the normalized partition coefficients. We’ll see how it matches up on the raw coefficients first, and then check that normalized coefficients properly assign the categories.

df_pcos %>%
    mutate(pco_sum=NULL) %>%
    pivot_longer(cols=starts_with("pco", ignore.case=FALSE),
                 names_to="partition", names_prefix="pco_",
                 values_to="coefficient") %>%
    group_by(gene_name, refseq_id) %>%
    slice_max(coefficient) %>%
    ungroup() %>%
    mutate(partition=factor(toupper(partition), levels=c("ER", "TG", "CY"))) %>%
    tbl_summary(by=category, include=partition, 
                label=c(category="Category", partition="Max Partition (raw)")) %>%
    modify_header(label ~ "**Categorization**")
Categorization DF, N = 3,3691 ER, N = 9191 TG, N = 1,2461 CY, N = 1,4811
Max Partition (raw)
ER 435 (13%) 864 (94%) 0 (0%) 0 (0%)
TG 1,297 (38%) 51 (5.5%) 1,246 (100%) 0 (0%)
CY 1,637 (49%) 4 (0.4%) 0 (0%) 1,481 (100%)
1 n (%)

Okay, that’s a little noisy, but we know that the actual categories were assigned using the normalized coefficients. So, let’s look at them…

df_tiger %>%
    pivot_longer(cols=starts_with("npco_"),
                 names_to="partition", names_prefix="npco_",
                 values_to="coefficient") %>%
    group_by(gene_name, refseq_id) %>%
    slice_max(coefficient) %>%
    ungroup() %>%
    mutate(partition=factor(toupper(partition), levels=c("ER", "TG", "CY"))) %>%
    tbl_summary(by=category, include=partition, 
                label=c(category="Category", partition="Max Partition (norm.)")) %>%
    modify_header(label ~ "**Categorization**")
Categorization DF, N = 3,3691 ER, N = 9191 TG, N = 1,2461 CY, N = 1,4811
Max Partition (norm.)
ER 1,104 (33%) 919 (100%) 0 (0%) 0 (0%)
TG 1,038 (31%) 0 (0%) 1,246 (100%) 0 (0%)
CY 1,227 (36%) 0 (0%) 0 (0%) 1,481 (100%)
1 n (%)

This shows consistency with maximum category.

Categorization

Let’s look at the categories visualized in ternary plots.

Raw Coefficients

The first plot shows a näive 50% cutoff line. That is, any genes outside the central triangle have 50% of their FPKMs coming from one subcellular compartment. The second plot includes cutoff lines that are chosen to separate the classifications.

df_tiger %>%
    ggtern(aes(x=pco_er, y=pco_cy, z=pco_tg, color=category)) +
    geom_point(size=0.2, alpha=0.4) +
    geom_Lline(Lintercept=0.50, linetype='dashed') + 
    geom_Rline(Rintercept=0.50, linetype='dashed') + 
    geom_Tline(Tintercept=0.50, linetype='dashed') +
    scale_color_manual(values=LOC_COLORS_DARK) +
    labs(x="ER", y="CY", z="TG") +
    guides(color="none") +
    theme_bw()

df_tiger %>%
    ggtern(aes(x=pco_er, y=pco_cy, z=pco_tg, color=category)) +
    geom_point(size=0.2, alpha=0.4) +
    geom_Lline(Lintercept=0.38, linetype='dashed') + 
    geom_Rline(Rintercept=0.42, linetype='dashed') + 
    geom_Tline(Tintercept=0.43, linetype='dashed') +
    scale_color_manual(values=LOC_COLORS_DARK) +
    labs(x="ER", y="CY", z="TG") +
    guides(color="none") +
    theme_bw()

ggsave("img/pcos_raw_ternary.pdf", width=5, height=5, dpi=300)

There is some non-linearity in there, but one may not notice it if not looking closely. This is expected because we assign categories using the normalized coefficients.

Normalized Coefficients

Three plots show 50% cutoffs, 40% cutoffs, and empirical (best fit) cutoffs.

df_tiger %>%
    ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=category)) +
    geom_point(size=0.2, alpha=0.4) +
    geom_Lline(Lintercept=0.5, linetype='dashed') + 
    geom_Rline(Rintercept=0.5, linetype='dashed') + 
    geom_Tline(Tintercept=0.5, linetype='dashed') +
    scale_color_manual(values=LOC_COLORS_DARK) +
    labs(x="ER", y="CY", z="TG") +
    guides(color='none') +
    theme_bw()

df_tiger %>%
    ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=category)) +
    geom_point(size=0.2, alpha=0.4) +
    geom_Lline(Lintercept=0.4, linetype='dashed') + 
    geom_Rline(Rintercept=0.4, linetype='dashed') + 
    geom_Tline(Tintercept=0.4, linetype='dashed') +
    scale_color_manual(values=LOC_COLORS_DARK) +
    labs(x="ER", y="CY", z="TG") +
    guides(color='none') +
    theme_bw()

df_tiger %>%
    ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=category)) +
    geom_point(size=0.2, alpha=0.4) +
    geom_Lline(Lintercept=0.405, linetype='dashed') + 
    geom_Rline(Rintercept=0.40, linetype='dashed') + 
    geom_Tline(Tintercept=0.41, linetype='dashed') +
    scale_color_manual(values=LOC_COLORS_DARK) +
    labs(x="ER", y="CY", z="TG") +
    guides(color='none') +
    theme_bw()

ggsave("img/pcos_norm_ternary.pdf", width=5, height=5, dpi=300)

Alternative Dot Sizes

df_tiger %>%
    ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=category)) +
    geom_point(size=0.4, alpha=0.4) +
    geom_Lline(Lintercept=0.405, linetype='dashed') + 
    geom_Rline(Rintercept=0.40, linetype='dashed') + 
    geom_Tline(Tintercept=0.41, linetype='dashed') +
    scale_color_manual(values=LOC_COLORS_DARK) +
    labs(x="ER", y="CY", z="TG") +
    guides(color='none') +
    theme_bw()

ggsave("img/pcos_norm_ternary_alt1.pdf", width=5, height=5, dpi=300)

df_tiger %>%
    ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=category)) +
    geom_point(size=0.8, alpha=0.6) +
    geom_Lline(Lintercept=0.405, linetype='dashed') + 
    geom_Rline(Rintercept=0.40, linetype='dashed') + 
    geom_Tline(Tintercept=0.41, linetype='dashed') +
    scale_color_manual(values=LOC_COLORS_DARK) +
    labs(x="ER", y="CY", z="TG") +
    guides(color='none') +
    theme_bw()

ggsave("img/pcos_norm_ternary_alt2.pdf", width=5, height=5, dpi=300)

Covariate Plots

Here we plot some of the covariates (e.g., CLIP peak counts) using the ternary plots. We both scale the point sizes and the color based on the covariate values. This helps to deemphasize all the those genes that have low values in favor of those with high values. Overall, I think the individual plots are not so clear to interpret, however, comparing the plots of different covariates seems to make for a nice contrast.

The ridge plots are more for modelling purposes, to explore what type of transformation might be best to apply before input the covariate into the regression model.

TIS11B

Ternary Plot

df_tiger %>%
    mutate(pscore_tis11b=2*(sqrt(TIS11B_CLIP) - sqrt(mean(TIS11B_CLIP, na.rm=TRUE)))) %>%
    arrange(TIS11B_CLIP) %>%
    ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=TIS11B_CLIP, size=TIS11B_CLIP)) +
    geom_point(alpha=0.3, pch=16) +
    geom_Lline(Lintercept=0.4, linetype='dashed') + 
    geom_Rline(Rintercept=0.4, linetype='dashed') + 
    geom_Tline(Tintercept=0.4, linetype='dashed') +
    scale_color_viridis_c(option="B", trans="sqrt") +
    scale_size_area() +
    labs(x="ER", y="CY", z="TG") +
    guides(size='none') +
    theme_light()

ggsave("img/tis11b_ternary.pdf", width=8, height=6, dpi=300)

This is clearly non-cytoplasmic, with leaning toward the TG side.

Ridge Plots

RBP_CLIP <- "TIS11B_CLIP"

df_tiger %>%
    ggplot(aes(x=.data[[RBP_CLIP]], y=category, fill=category)) +
    geom_density_ridges(alpha=0.95) +
    scale_fill_manual(values=LOC_COLORS) +
    labs(x="Raw Counts") +
    theme_bw()

df_tiger %>%
    mutate(pscore=2*(sqrt(.[[RBP_CLIP]]) - sqrt(mean(.[[RBP_CLIP]], na.rm=TRUE)))) %>%
    ggplot(aes(x=pscore, y=category, fill=category)) +
    geom_density_ridges(alpha=0.95) +
    scale_fill_manual(values=LOC_COLORS) +
    labs(x="Variance Stabilizing Transform") +
    theme_bw()

df_tiger %>%
    mutate(zscore=scale(.[[RBP_CLIP]])) %>%
    ggplot(aes(x=zscore, y=category, fill=category)) +
    geom_density_ridges(alpha=0.95) +
    scale_fill_manual(values=LOC_COLORS) +
    labs(x="Z-Score") +
    theme_bw()

TIA1_L1

Ternary Plot

df_tiger %>%
    mutate(pscore_tis11b=2*(sqrt(TIA1_L1_CLIP) - sqrt(mean(TIA1_L1_CLIP, na.rm=TRUE)))) %>%
    arrange(TIA1_L1_CLIP) %>%
    ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=TIA1_L1_CLIP, size=TIA1_L1_CLIP)) +
    geom_point(alpha=0.3, pch=16) +
    geom_Lline(Lintercept=0.4, linetype='dashed') + 
    geom_Rline(Rintercept=0.4, linetype='dashed') + 
    geom_Tline(Tintercept=0.4, linetype='dashed') +
    scale_color_viridis_c(option="B", trans="sqrt") +
    scale_size_area() +
    labs(x="ER", y="CY", z="TG") +
    guides(size='none') +
    theme_light()

ggsave("img/tial1_ternary.pdf", width=8, height=6, dpi=300)

Ridge Plot

RBP_CLIP <- "TIA1_L1_CLIP"

df_tiger %>%
    ggplot(aes(x=.data[[RBP_CLIP]], y=category, fill=category)) +
    geom_density_ridges(alpha=0.95) +
    scale_fill_manual(values=LOC_COLORS) +
    labs(x="Raw Counts") +
    theme_bw()

df_tiger %>%
    mutate(pscore=2*(sqrt(.[[RBP_CLIP]]) - sqrt(mean(.[[RBP_CLIP]], na.rm=TRUE)))) %>%
    ggplot(aes(x=pscore, y=category, fill=category)) +
    geom_density_ridges(alpha=0.95) +
    scale_fill_manual(values=LOC_COLORS) +
    labs(x="Variance Stabilizing Transform") +
    theme_bw()

df_tiger %>%
    mutate(zscore=scale(.[[RBP_CLIP]])) %>%
    ggplot(aes(x=zscore, y=category, fill=category)) +
    geom_density_ridges(alpha=0.95) +
    scale_fill_manual(values=LOC_COLORS) +
    labs(x="Z-Score") +
    theme_bw()

PUM2 CLIP

Ternary Plot

df_tiger %>%
    mutate(pscore_tis11b=2*(sqrt(PUM2_CLIP) - sqrt(mean(PUM2_CLIP, na.rm=TRUE)))) %>%
    arrange(PUM2_CLIP) %>%
    ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=PUM2_CLIP, size=PUM2_CLIP)) +
    geom_point(alpha=0.3, pch=16) +
    geom_Lline(Lintercept=0.4, linetype='dashed') + 
    geom_Rline(Rintercept=0.4, linetype='dashed') + 
    geom_Tline(Tintercept=0.4, linetype='dashed') +
    scale_color_viridis_c(option="B", trans="sqrt") +
    scale_size_area() +
    labs(x="ER", y="CY", z="TG") +
    guides(size='none') +
    theme_light()

ggsave("img/pum2_clip_ternary.pdf", width=8, height=6, dpi=300)

Ridge Plot

RBP_CLIP <- "PUM2_CLIP"

df_tiger %>%
    ggplot(aes(x=.data[[RBP_CLIP]], y=category, fill=category)) +
    geom_density_ridges(alpha=0.95) +
    scale_fill_manual(values=LOC_COLORS) +
    labs(x="Raw Counts") +
    theme_bw()

df_tiger %>%
    mutate(pscore=2*(sqrt(.[[RBP_CLIP]]) - sqrt(mean(.[[RBP_CLIP]], na.rm=TRUE)))) %>%
    ggplot(aes(x=pscore, y=category, fill=category)) +
    geom_density_ridges(alpha=0.95) +
    scale_fill_manual(values=LOC_COLORS) +
    labs(x="Variance Stabilizing Transform") +
    theme_bw()

df_tiger %>%
    mutate(zscore=scale(.[[RBP_CLIP]])) %>%
    ggplot(aes(x=zscore, y=category, fill=category)) +
    geom_density_ridges(alpha=0.95) +
    scale_fill_manual(values=LOC_COLORS) +
    labs(x="Z-Score") +
    theme_bw()

Pumilio num

Ternary Plot

df_tiger %>%
    mutate(pscore=2*(sqrt(Pumilio_3UTR_num) - sqrt(mean(Pumilio_3UTR_num, na.rm=TRUE)))) %>%
    arrange(Pumilio_3UTR_num) %>%
    ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, 
               color=Pumilio_3UTR_num, 
               size=Pumilio_3UTR_num)) +
    geom_point(alpha=0.3, pch=16) +
    geom_Lline(Lintercept=0.4, linetype='dashed') + 
    geom_Rline(Rintercept=0.4, linetype='dashed') + 
    geom_Tline(Tintercept=0.4, linetype='dashed') +
    scale_color_viridis_c(option="B", trans="sqrt") +
    scale_size_area() +
    labs(x="ER", y="CY", z="TG") +
    guides(size='none') +
    theme_light()

ggsave("img/pumilio_3utr_ternary.pdf", width=8, height=6, dpi=300)

Density

df_tiger %>%
    mutate(pum_density=1000*Pumilio_3UTR_num/Anno_3UTR_length) %>%
    arrange(pum_density) %>%
    ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, 
               color=pum_density, 
               size=pum_density)) +
    geom_point(alpha=0.3, pch=16) +
    geom_Lline(Lintercept=0.4, linetype='dashed') + 
    geom_Rline(Rintercept=0.4, linetype='dashed') + 
    geom_Tline(Tintercept=0.4, linetype='dashed') +
    scale_color_viridis_c(option="B", trans="sqrt") +
    scale_size_area() +
    labs(x="ER", y="CY", z="TG") +
    guides(size='none') +
    theme_light()

ggsave("img/pumilio_density_ternary.pdf", width=8, height=6, dpi=300)

Ridge Plot

RBP_CLIP <- "Pumilio_3UTR_num"

df_tiger %>%
    ggplot(aes(x=.data[[RBP_CLIP]], y=category, fill=category)) +
    geom_density_ridges(alpha=0.95) +
    scale_fill_manual(values=LOC_COLORS) +
    labs(x="Raw Counts") +
    theme_bw()

df_tiger %>%
    mutate(pscore=2*(sqrt(.[[RBP_CLIP]]) - sqrt(mean(.[[RBP_CLIP]], na.rm=TRUE)))) %>%
    ggplot(aes(x=pscore, y=category, fill=category)) +
    geom_density_ridges(alpha=0.95) +
    scale_fill_manual(values=LOC_COLORS) +
    labs(x="Variance Stabilizing Transform") +
    theme_bw()

df_tiger %>%
    mutate(zscore=scale(.[[RBP_CLIP]])) %>%
    ggplot(aes(x=zscore, y=category, fill=category)) +
    geom_density_ridges(alpha=0.95) +
    scale_fill_manual(values=LOC_COLORS) +
    labs(x="Z-Score") +
    theme_bw()

Combination Plots

TIS11B and TIAL1

df_tiger %>%
    mutate(pscore_tis11b=2*(sqrt(TIS11B_CLIP) - sqrt(mean(TIS11B_CLIP, na.rm=TRUE))),
           pscore_tial1=2*(sqrt(TIA1_L1_CLIP) - sqrt(mean(TIA1_L1_CLIP, na.rm=TRUE)))) %>%
    arrange(pmax(TIS11B_CLIP,TIA1_L1_CLIP)) %>%
    ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg)) +
    geom_point(aes(color=TIS11B_CLIP, size=TIS11B_CLIP+TIA1_L1_CLIP), 
               alpha=0.60, pch=16) +
    scale_color_gradient(low="#000000", high="#FF0000", trans="log1p") +
    geom_point(aes(fill=TIA1_L1_CLIP, size=TIS11B_CLIP+TIA1_L1_CLIP), 
               alpha=0.12, pch=21) +
    scale_fill_gradient(low="#000000", high="#00FF00", trans="log1p") +
    geom_Lline(Lintercept=0.4, linetype='dashed') + 
    geom_Rline(Rintercept=0.4, linetype='dashed') + 
    geom_Tline(Tintercept=0.4, linetype='dashed') +
    scale_size_area() +
    labs(x="ER", y="CY", z="TG") +
    guides(size='none') +
    theme_dark() +
    theme(panel.background=element_rect(fill="#000000"))

ggsave("img/tis11b_tial1_ternary.pdf", width=10, height=8, dpi=300)

TIS11B and HuR

df_tiger %>%
    mutate(pscore_tis11b=2*(sqrt(TIS11B_CLIP) - sqrt(mean(TIS11B_CLIP, na.rm=TRUE))),
           pscore_hur=2*(sqrt(Muk_HuR_CLIP) - sqrt(mean(Muk_HuR_CLIP, na.rm=TRUE)))) %>%
    arrange(pmax(TIS11B_CLIP,Muk_HuR_CLIP)) %>%
    ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg)) +
    geom_point(aes(color=TIS11B_CLIP, size=TIS11B_CLIP+Muk_HuR_CLIP), 
               alpha=0.60, pch=16) +
    scale_color_gradient(low="#000000", high="#FF0000", trans="log1p") +
    geom_point(aes(fill=Muk_HuR_CLIP, size=TIS11B_CLIP+Muk_HuR_CLIP), 
               alpha=0.15, pch=21) +
    scale_fill_gradient(low="#000000", high="#00FF00", trans="log1p") +
    geom_Lline(Lintercept=0.4, linetype='dashed') + 
    geom_Rline(Rintercept=0.4, linetype='dashed') + 
    geom_Tline(Tintercept=0.4, linetype='dashed') +
    scale_size_area() +
    labs(x="ER", y="CY", z="TG") +
    guides(size='none') +
    theme_dark() +
    theme(panel.background=element_rect(fill="#000000"))

ggsave("img/tis11b_hur_ternary.pdf", width=10, height=8, dpi=300)

TIS11B and Pum1/2

df_tiger %>%
    mutate(pscore_tis11b=2*(sqrt(TIS11B_CLIP) - sqrt(mean(TIS11B_CLIP, na.rm=TRUE))),
           pscore_pum12=2*(sqrt(PUM_RIP12) - sqrt(mean(PUM_RIP12, na.rm=TRUE)))) %>%
    arrange(pmax(TIS11B_CLIP,PUM_RIP12)) %>%
    ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg)) +
    geom_point(aes(color=TIS11B_CLIP, size=TIS11B_CLIP+PUM_RIP12), 
               alpha=0.60, pch=16) +
    scale_color_gradient(low="#000000", high="#FF0000", trans="log1p") +
    geom_point(aes(fill=PUM_RIP12, size=TIS11B_CLIP+PUM_RIP12), 
               alpha=0.15, pch=21) +
    scale_fill_gradient(low="#000000", high="#00FF00", trans="identity") +
    geom_Lline(Lintercept=0.4, linetype='dashed') + 
    geom_Rline(Rintercept=0.4, linetype='dashed') + 
    geom_Tline(Tintercept=0.4, linetype='dashed') +
    scale_size_area() +
    labs(x="ER", y="CY", z="TG") +
    guides(size='none') +
    theme_dark() +
    theme(panel.background=element_rect(fill="#000000"))

ggsave("img/tis11b_pum12_ternary.pdf", width=10, height=8, dpi=300)

Conclusion

Data looks very promising to predict partition coefficients from covariate values. Ridge peaks indicate the variance stabilizing transform is very effective. We may want to combine the VST with a z-score to create model coefficients that are directly comparable.


Runtime Details

Session Info

## R version 4.1.3 (2022-03-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/mfansler/miniconda3/envs/brms_r41/lib/libopenblasp-r0.3.20.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] cowplot_1.1.1   readxl_1.4.0    gtsummary_1.6.1 ggridges_0.5.3 
##  [5] ggtern_3.3.5    forcats_0.5.1   stringr_1.4.0   dplyr_1.0.9    
##  [9] purrr_0.3.4     readr_2.1.2     tidyr_1.2.0     tibble_3.1.7   
## [13] ggplot2_3.3.6   tidyverse_1.3.1 magrittr_2.0.3 
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.3          sass_0.4.1          viridisLite_0.4.0  
##  [4] jsonlite_1.8.0      modelr_0.1.8        bslib_0.3.1        
##  [7] assertthat_0.2.1    highr_0.9           tensorA_0.36.2     
## [10] cellranger_1.1.0    yaml_2.3.5          robustbase_0.95-0  
## [13] pillar_1.7.0        backports_1.4.1     lattice_0.20-45    
## [16] glue_1.6.2          digest_0.6.29       checkmate_2.1.0    
## [19] rvest_1.0.2         colorspace_2.0-3    htmltools_0.5.2    
## [22] plyr_1.8.7          pkgconfig_2.0.3     broom_0.8.0        
## [25] haven_2.5.0         scales_1.2.0        tzdb_0.3.0         
## [28] farver_2.1.0        generics_0.1.2      ellipsis_0.3.2     
## [31] withr_2.5.0         cli_3.3.0           proto_1.0.0        
## [34] crayon_1.5.1        evaluate_0.15       fs_1.5.2           
## [37] fansi_1.0.3         broom.helpers_1.7.0 MASS_7.3-57        
## [40] xml2_1.3.3          compositions_2.0-4  tools_4.1.3        
## [43] hms_1.1.1           lifecycle_1.0.1     munsell_0.5.0      
## [46] reprex_2.0.1        compiler_4.1.3      jquerylib_0.1.4    
## [49] rlang_1.0.2         grid_4.1.3          gt_0.6.0           
## [52] rstudioapi_0.13     labeling_0.4.2      rmarkdown_2.14     
## [55] gtable_0.3.0        DBI_1.1.3           R6_2.5.1           
## [58] bayesm_3.1-4        gridExtra_2.3       lubridate_1.8.0    
## [61] knitr_1.39          fastmap_1.1.0       utf8_1.2.2         
## [64] commonmark_1.8.0    latex2exp_0.9.4     stringi_1.7.6      
## [67] Rcpp_1.0.8.3        vctrs_0.4.1         DEoptimR_1.0-11    
## [70] dbplyr_2.2.0        tidyselect_1.1.2    xfun_0.31

Conda Environment

## Conda Environment YAML
name: brms_r41
channels:
  - merv
  - conda-forge
  - bioconda
  - defaults
dependencies:
  - _r-mutex=1.0.1=anacondar_1
  - bwidget=1.9.14=h694c41f_1
  - bzip2=1.0.8=h0d85af4_4
  - c-ares=1.18.1=h0d85af4_0
  - ca-certificates=2022.6.15=h033912b_0
  - cairo=1.16.0=h9e0e54b_1010
  - cctools_osx-64=973.0.1=h3eff9a4_10
  - clang=14.0.4=h694c41f_0
  - clang-14=14.0.4=default_h55ffa42_0
  - clang_osx-64=14.0.4=h3a95cd4_2
  - clangxx=14.0.4=default_h55ffa42_0
  - clangxx_osx-64=14.0.4=he1dbc44_2
  - compiler-rt=14.0.4=h7fcd477_0
  - compiler-rt_osx-64=14.0.4=h6df654d_0
  - curl=7.83.1=h23f1065_0
  - expat=2.4.8=h96cf925_0
  - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
  - font-ttf-inconsolata=3.000=h77eed37_0
  - font-ttf-source-code-pro=2.038=h77eed37_0
  - font-ttf-ubuntu=0.83=hab24e00_0
  - fontconfig=2.14.0=h676cef8_0
  - fonts-conda-ecosystem=1=0
  - fonts-conda-forge=1=0
  - freetype=2.10.4=h4cff582_1
  - fribidi=1.0.10=hbcb3906_0
  - gettext=0.19.8.1=hd1a6beb_1008
  - gfortran_impl_osx-64=9.3.0=h9cc0e5e_23
  - gfortran_osx-64=9.3.0=h18f7dce_15
  - gmp=6.2.1=h2e338ed_0
  - graphite2=1.3.13=h2e338ed_1001
  - gsl=2.7=h93259b0_0
  - harfbuzz=4.2.0=h48644e2_0
  - icu=69.1=he49afe7_0
  - isl=0.22.1=hb1e8313_2
  - jpeg=9e=h5eb16cf_1
  - krb5=1.19.3=hb98e516_0
  - ld64_osx-64=609=h1e06c2b_10
  - lerc=3.0=he49afe7_0
  - libblas=3.9.0=15_osx64_openblas
  - libcblas=3.9.0=15_osx64_openblas
  - libclang-cpp14=14.0.4=default_h55ffa42_0
  - libcurl=7.83.1=h23f1065_0
  - libcxx=14.0.5=hce7ea42_1
  - libdeflate=1.12=hac89ed1_0
  - libedit=3.1.20191231=h0678c8f_2
  - libev=4.33=haf1e3a3_1
  - libffi=3.4.2=h0d85af4_5
  - libgfortran=5.0.0=9_3_0_h6c81a4c_23
  - libgfortran-devel_osx-64=9.3.0=h6c81a4c_23
  - libgfortran5=9.3.0=h6c81a4c_23
  - libglib=2.70.2=hf1fb8c0_4
  - libiconv=1.16=haf1e3a3_0
  - liblapack=3.9.0=15_osx64_openblas
  - libllvm13=13.0.1=h64f94b2_2
  - libllvm14=14.0.4=h41df66c_0
  - libnghttp2=1.47.0=hca56917_0
  - libopenblas=0.3.20=openmp_hb3cd9ec_0
  - libpng=1.6.37=h7cec526_2
  - libssh2=1.10.0=hd3787cc_2
  - libtiff=4.4.0=h9847915_1
  - libv8=8.9.83=h5fe4d7b_1
  - libwebp-base=1.2.2=h0d85af4_1
  - libxml2=2.9.12=h7e28ab6_1
  - libzlib=1.2.12=hfe4f2af_1
  - llvm-openmp=14.0.4=ha654fa7_0
  - llvm-tools=14.0.4=h41df66c_0
  - lz4-c=1.9.3=he49afe7_1
  - make=4.3=h22f3db7_1
  - mpc=1.2.1=hbb51d92_0
  - mpfr=4.1.0=h0f52abe_1
  - ncurses=6.3=h96cf925_1
  - nlopt=2.7.1=py310hdbd82a6_1
  - numpy=1.22.4=py310hed37afb_0
  - openssl=3.0.4=hfe4f2af_2
  - pandoc=2.18=h694c41f_0
  - pango=1.50.7=hc4a7b6d_0
  - pcre=8.45=he49afe7_0
  - pcre2=10.37=ha16e1b2_0
  - pip=22.1.2=pyhd8ed1ab_0
  - pixman=0.40.0=hbcb3906_0
  - python=3.10.5=hdd68b96_0_cpython
  - python_abi=3.10=2_cp310
  - r-abind=1.4_5=r41hc72bb7e_1003
  - r-askpass=1.1=r41h28b5c78_2
  - r-assertthat=0.2.1=r41hc72bb7e_2
  - r-backports=1.4.1=r41h28b5c78_0
  - r-base=4.1.3=h56d3809_0
  - r-base64enc=0.1_3=r41h28b5c78_1004
  - r-bayesm=3.1_4=r41he5a6823_2
  - r-bayesplot=1.9.0=r41hc72bb7e_0
  - r-bh=1.78.0_0=r41hc72bb7e_0
  - r-bit=4.0.4=r41h28b5c78_0
  - r-bit64=4.0.5=r41h28b5c78_0
  - r-bitops=1.0_7=r41h28b5c78_0
  - r-blob=1.2.3=r41hc72bb7e_0
  - r-boot=1.3_28=r41hc72bb7e_0
  - r-bridgesampling=1.1_2=r41hc72bb7e_0
  - r-brio=1.1.3=r41h28b5c78_0
  - r-brms=2.17.0=r41hc4bb905_0
  - r-brobdingnag=1.2_7=r41hc72bb7e_0
  - r-broom=0.8.0=r41hc72bb7e_0
  - r-broom.helpers=1.7.0=r41hc72bb7e_0
  - r-bslib=0.3.1=r41hc72bb7e_0
  - r-cachem=1.0.6=r41h28b5c78_0
  - r-callr=3.7.0=r41hc72bb7e_0
  - r-caret=6.0_92=r41h0f1d5c4_0
  - r-cellranger=1.1.0=r41hc72bb7e_1004
  - r-checkmate=2.1.0=r41h0f1d5c4_0
  - r-class=7.3_20=r41h28b5c78_0
  - r-cli=3.3.0=r41h8619c4b_0
  - r-clipr=0.8.0=r41hc72bb7e_0
  - r-coda=0.19_4=r41hc72bb7e_0
  - r-codetools=0.2_18=r41hc72bb7e_0
  - r-colorspace=2.0_3=r41h0f1d5c4_0
  - r-colourpicker=1.1.1=r41hc72bb7e_0
  - r-commonmark=1.8.0=r41h0f1d5c4_0
  - r-compositions=2.0_4=r41h28b5c78_0
  - r-cowplot=1.1.1=r41hc72bb7e_0
  - r-cpp11=0.4.2=r41hc72bb7e_0
  - r-crayon=1.5.1=r41hc72bb7e_0
  - r-crosstalk=1.2.0=r41hc72bb7e_0
  - r-curl=4.3.2=r41h28b5c78_0
  - r-data.table=1.14.2=r41ha76789d_0
  - r-dbi=1.1.3=r41hc72bb7e_0
  - r-dbplyr=2.2.0=r41hc72bb7e_0
  - r-deoptimr=1.0_11=r41hc72bb7e_0
  - r-desc=1.4.1=r41hc72bb7e_0
  - r-diffobj=0.3.5=r41h28b5c78_0
  - r-digest=0.6.29=r41h9951f98_0
  - r-dirichletreg=0.7_1=r41h28b5c78_0
  - r-distributional=0.3.0=r41hc72bb7e_0
  - r-dplyr=1.0.9=r41h8619c4b_0
  - r-dt=0.23=r41hc72bb7e_0
  - r-dtplyr=1.2.1=r41hc72bb7e_0
  - r-dygraphs=1.1.1.6=r41hc72bb7e_1003
  - r-e1071=1.7_11=r41h8619c4b_0
  - r-ellipsis=0.3.2=r41h28b5c78_0
  - r-evaluate=0.15=r41hc72bb7e_0
  - r-fansi=1.0.3=r41h0f1d5c4_0
  - r-farver=2.1.0=r41h9951f98_0
  - r-fastmap=1.1.0=r41h9951f98_0
  - r-fontawesome=0.2.2=r41hc72bb7e_0
  - r-forcats=0.5.1=r41hc72bb7e_0
  - r-foreach=1.5.2=r41hc72bb7e_0
  - r-formula=1.2_4=r41hc72bb7e_0
  - r-fs=1.5.2=r41hc4bb905_1
  - r-future=1.26.1=r41hc72bb7e_0
  - r-future.apply=1.9.0=r41hc72bb7e_0
  - r-gamm4=0.2_6=r41hc72bb7e_1
  - r-gargle=1.2.0=r41hc72bb7e_0
  - r-generics=0.1.2=r41hc72bb7e_0
  - r-ggplot2=3.3.6=r41hc72bb7e_0
  - r-ggrepel=0.9.1=r41h9951f98_0
  - r-ggridges=0.5.3=r41hc72bb7e_0
  - r-ggtern=3.3.5=r41hc72bb7e_0
  - r-globals=0.15.0=r41hc72bb7e_0
  - r-glue=1.6.2=r41h0f1d5c4_0
  - r-googledrive=2.0.0=r41hc72bb7e_0
  - r-googlesheets4=1.0.0=r41h785f33e_0
  - r-gower=1.0.0=r41h28b5c78_0
  - r-gridextra=2.3=r41hc72bb7e_1003
  - r-gt=0.6.0=r41hc72bb7e_0
  - r-gtable=0.3.0=r41hc72bb7e_3
  - r-gtools=3.9.2.2=r41h67d6963_0
  - r-gtsummary=1.6.1=r41hc72bb7e_0
  - r-hardhat=1.1.0=r41hc72bb7e_0
  - r-haven=2.5.0=r41h8619c4b_0
  - r-hexbin=1.28.2=r41h8e0a2a9_0
  - r-highr=0.9=r41hc72bb7e_0
  - r-hms=1.1.1=r41hc72bb7e_0
  - r-htmltools=0.5.2=r41h9951f98_0
  - r-htmlwidgets=1.5.4=r41hc72bb7e_0
  - r-httpuv=1.6.5=r41h9951f98_0
  - r-httr=1.4.3=r41hc72bb7e_0
  - r-ids=1.0.1=r41hc72bb7e_1
  - r-igraph=1.3.0=r41hd51be07_0
  - r-inline=0.3.19=r41hc72bb7e_0
  - r-ipred=0.9_13=r41h67d6963_0
  - r-isoband=0.2.5=r41h9951f98_0
  - r-iterators=1.0.14=r41hc72bb7e_0
  - r-jquerylib=0.1.4=r41hc72bb7e_0
  - r-jsonlite=1.8.0=r41h0f1d5c4_0
  - r-kernsmooth=2.23_20=r41he19034d_0
  - r-knitr=1.39=r41hc72bb7e_0
  - r-labeling=0.4.2=r41hc72bb7e_1
  - r-labelled=2.9.1=r41hc72bb7e_0
  - r-later=1.2.0=r41h9951f98_0
  - r-latex2exp=0.9.4=r41hc72bb7e_0
  - r-lattice=0.20_45=r41h28b5c78_0
  - r-lava=1.6.10=r41hc72bb7e_0
  - r-lazyeval=0.2.2=r41h28b5c78_2
  - r-lifecycle=1.0.1=r41hc72bb7e_0
  - r-listenv=0.8.0=r41hc72bb7e_1
  - r-lme4=1.1_29=r41hc4bb905_0
  - r-loo=2.5.1=r41hc72bb7e_0
  - r-lubridate=1.8.0=r41h9951f98_0
  - r-magrittr=2.0.3=r41h0f1d5c4_0
  - r-markdown=1.1=r41h28b5c78_1
  - r-mass=7.3_57=r41h67d6963_0
  - r-matrix=1.4_1=r41ha2825d1_0
  - r-matrixstats=0.62.0=r41h0f1d5c4_0
  - r-maxlik=1.5_2=r41hc72bb7e_0
  - r-mervdown=0.1.1=r41_0
  - r-mgcv=1.8_40=r41h60b693f_0
  - r-mime=0.12=r41h28b5c78_0
  - r-miniui=0.1.1.1=r41hc72bb7e_1002
  - r-minqa=1.2.4=r41h1c00d0a_1006
  - r-misctools=0.6_26=r41hc72bb7e_1
  - r-modelmetrics=1.2.2.2=r41hff6cd7b_1
  - r-modelr=0.1.8=r41hc72bb7e_0
  - r-munsell=0.5.0=r41hc72bb7e_1004
  - r-mvtnorm=1.1_3=r41h749f5a1_0
  - r-nleqslv=3.3.2=r41h749f5a1_1006
  - r-nlme=3.1_158=r41he3b5f32_0
  - r-nloptr=2.0.3=r41h4294f1f_0
  - r-nnet=7.3_17=r41h28b5c78_0
  - r-numderiv=2016.8_1.1=r41hc72bb7e_3
  - r-openssl=2.0.2=r41he24a83a_0
  - r-packrat=0.8.0=r41hc72bb7e_0
  - r-parallelly=1.32.0=r41hc72bb7e_0
  - r-pillar=1.7.0=r41hc72bb7e_0
  - r-pkgbuild=1.3.1=r41hc72bb7e_0
  - r-pkgconfig=2.0.3=r41hc72bb7e_1
  - r-pkgload=1.2.4=r41h9951f98_0
  - r-plyr=1.8.7=r41hc4bb905_0
  - r-posterior=1.2.2=r41hc72bb7e_0
  - r-praise=1.0.0=r41hc72bb7e_1005
  - r-prettyunits=1.1.1=r41hc72bb7e_1
  - r-proc=1.18.0=r41h9951f98_0
  - r-processx=3.6.1=r41h67d6963_0
  - r-prodlim=2019.11.13=r41h9951f98_1
  - r-progress=1.2.2=r41hc72bb7e_2
  - r-progressr=0.10.1=r41hc72bb7e_0
  - r-projpred=2.1.2=r41h8619c4b_0
  - r-promises=1.2.0.1=r41h9951f98_0
  - r-proto=1.0.0=r41ha770c72_2003
  - r-proxy=0.4_27=r41h67d6963_0
  - r-ps=1.7.1=r41h67d6963_0
  - r-purrr=0.3.4=r41h28b5c78_1
  - r-r6=2.5.1=r41hc72bb7e_0
  - r-randomforest=4.7_1.1=r41he3b5f32_0
  - r-rappdirs=0.3.3=r41h28b5c78_0
  - r-rcolorbrewer=1.1_3=r41h785f33e_0
  - r-rcpp=1.0.8.3=r41hc4bb905_0
  - r-rcpparmadillo=0.11.2.0.0=r41h3fca91c_0
  - r-rcppeigen=0.3.3.9.2=r41hde7ee74_0
  - r-rcppparallel=5.1.5=r41h9951f98_0
  - r-readr=2.1.2=r41h9951f98_0
  - r-readxl=1.4.0=r41h45e8629_0
  - r-recipes=0.2.0=r41hc72bb7e_0
  - r-rematch=1.0.1=r41hc72bb7e_1004
  - r-rematch2=2.1.2=r41hc72bb7e_1
  - r-reprex=2.0.1=r41hc72bb7e_0
  - r-reshape2=1.4.4=r41h9951f98_1
  - r-rlang=1.0.2=r41hc4bb905_0
  - r-rmarkdown=2.14=r41hc72bb7e_0
  - r-robustbase=0.95_0=r41h6f100c1_0
  - r-rpart=4.1.16=r41h28b5c78_0
  - r-rprojroot=2.0.3=r41hc72bb7e_0
  - r-rsconnect=0.8.26=r41hc72bb7e_0
  - r-rstan=2.21.5=r41hc4bb905_0
  - r-rstantools=2.2.0=r41hc4bb905_0
  - r-rstudioapi=0.13=r41hc72bb7e_0
  - r-rvest=1.0.2=r41hc72bb7e_0
  - r-sandwich=3.0_2=r41hc72bb7e_0
  - r-sass=0.4.1=r41hc4bb905_0
  - r-scales=1.2.0=r41hc72bb7e_0
  - r-selectr=0.4_2=r41hc72bb7e_1
  - r-shiny=1.7.1=r41h785f33e_0
  - r-shinyjs=2.1.0=r41hc72bb7e_0
  - r-shinystan=2.6.0=r41hc72bb7e_0
  - r-shinythemes=1.2.0=r41hc72bb7e_0
  - r-sourcetools=0.1.7=r41h9951f98_1002
  - r-squarem=2021.1=r41hc72bb7e_0
  - r-stanheaders=2.21.0_7=r41h1c00d0a_0
  - r-statmod=1.4.36=r41h0661a58_0
  - r-stringi=1.7.6=r41h99ba7f4_1
  - r-stringr=1.4.0=r41hc72bb7e_2
  - r-survival=3.3_1=r41h0f1d5c4_0
  - r-sys=3.4=r41h28b5c78_0
  - r-tensora=0.36.2=r41h28b5c78_0
  - r-testthat=3.1.4=r41h8619c4b_0
  - r-threejs=0.3.3=r41hc72bb7e_1
  - r-tibble=3.1.7=r41h67d6963_0
  - r-tidyr=1.2.0=r41h9951f98_0
  - r-tidyselect=1.1.2=r41hbe3e9c8_0
  - r-tidyverse=1.3.1=r41hc72bb7e_0
  - r-timedate=3043.102=r41hc72bb7e_1002
  - r-tinytex=0.40=r41hc72bb7e_0
  - r-tzdb=0.3.0=r41hc4bb905_0
  - r-utf8=1.2.2=r41h28b5c78_0
  - r-uuid=1.1_0=r41h0f1d5c4_0
  - r-v8=4.2.0=r41he06a7a2_0
  - r-vctrs=0.4.1=r41hc4bb905_0
  - r-viridislite=0.4.0=r41hc72bb7e_0
  - r-vroom=1.5.7=r41h9951f98_0
  - r-waldo=0.4.0=r41hc72bb7e_0
  - r-withr=2.5.0=r41hc72bb7e_0
  - r-writexl=1.4.0=r41h28b5c78_0
  - r-xfun=0.31=r41h8619c4b_0
  - r-xml2=1.3.3=r41h9951f98_0
  - r-xtable=1.8_4=r41hc72bb7e_3
  - r-xts=0.12.1=r41h0f1d5c4_0
  - r-yaml=2.3.5=r41h0f1d5c4_0
  - r-zoo=1.8_10=r41h0f1d5c4_0
  - readline=8.1.2=h3899abd_0
  - setuptools=62.6.0=py310h2ec42d9_0
  - sigtool=0.1.3=h88f4db0_0
  - sqlite=3.38.5=hd9f0692_0
  - tapi=1100.0.11=h9ce4665_0
  - tk=8.6.12=h5dbffcc_0
  - tktable=2.10=h49f0cf7_3
  - tzdata=2022a=h191b570_0
  - wheel=0.37.1=pyhd8ed1ab_0
  - xz=5.2.5=haf1e3a3_1
  - zlib=1.2.12=hfe4f2af_1
  - zstd=1.5.2=ha9df2e0_1
prefix: /Users/mfansler/miniconda3/envs/brms_r41